################################################
# ESTIMATE PROPORTION OF JEWS IN LA RÉSISTANCE #
################################################

# Author: Kasia Nalewajko
# First created: 10 November 2022
# Replicated: 10 June 2024

rm(list = ls())

# LOAD PACKAGES -----------------------------------------------------------

if (!require("dplyr")) install.packages("dplyr")
if (!require("ggplot2")) install.packages("ggplot2")

# LOAD DATA ---------------------------------------------------------------

load("./00 SUBMITTED/00 APSR final/04 replication_files/01 data/resisters_all.Rda")
french_surnames <- read.table(file = "./00 SUBMITTED/00 APSR final/04 replication_files/01 data/INSEEnoms2008nat_txt.txt",
                              encoding = "UTF-8",
                              fill = TRUE,
                              header = TRUE)

# CLEAN DATA -----------------------------

french_surnames <- french_surnames[, -c(7:12)] # keep only records of births until 1940

french_surnames <- french_surnames %>%
  mutate_at(vars(starts_with("X_")), funs(as.numeric(as.character(.))))

french_surnames <- na.omit(french_surnames) # delete rows with non-hyphenated compound surnames

french_surnames$births_sum <- rowSums(french_surnames[grep('X', names(french_surnames))])

top_french_surnames <- french_surnames %>% 
  dplyr::select(NOM, births_sum) %>% 
  arrange(desc(births_sum))

# run a loop that includes (step-by-step) all resisters with common French surnames

sequence <- seq(0,1, by = 0.01)
Avotaynu <- data.frame(inAvotaynu = NULL, n = NULL, quantile = NULL)
for (i in 1:length(sequence)){
  temp <- all_resisters %>% 
    filter(births_sum <= quantile(top_french_surnames$births_sum, sequence[i])) %>% 
    group_by(`inAvotaynu`) %>% 
    summarise(n = n()) %>% # number included rows (individual insurgents)
    filter(inAvotaynu == "Avotaynu") %>% 
    mutate(quantile = sequence[i]) # indicate what quantile of surname popularity this estimate pertains to
  Avotaynu <- rbind(Avotaynu, temp)
  print(i)
}

# PLOT ----------------

Avotaynu %>% 
  mutate(prop = n/597720) %>% # over the sum of all insurgents
  ggplot() +
  geom_point(aes(x = quantile, y = n)) +
  theme_bw() +
  scale_y_continuous(sec.axis = sec_axis(~./597720, name = "Insurgents classified as Jewish (proportion)")) +
  labs(x = "Percentile of included French surnames, by surname prevalence",
       y = "Insurgents classified as Jewish (count)"
       )

# SAVE ------------------

ggsave(
  "D2_matching_avotaynu.png",
  plot = last_plot(),
  path = "./00 SUBMITTED/00 APSR final/03 dataverse_online_appendix/figures",
  width = 17,
  height = 12,
  units = "cm",
  dpi = 300
)

